{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Import libraries\n",
    "\n",
    "from __future__ import print_function\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import sys\n",
    "from sklearn.preprocessing import LabelBinarizer\n",
    "from six.moves import cPickle as pickle\n",
    "from six.moves import range\n",
    "\n",
    "from timeit import default_timer as timer\n",
    "import librosa\n",
    "\n",
    "# Config the matlotlib backend as plotting inline in IPython\n",
    "%matplotlib inline\n",
    "import seaborn as sns\n",
    "current_palette = sns.color_palette()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature extraction to constant shaped MFCC data\n",
    "\n",
    "We were heavily influence on this technique used by Aqib Saeed\n",
    "[here](https://github.com/aqibsaeed/Urban-Sound-Classification/blob/master/Urban%20Sound%20Classification%20using%20RNN.ipynb)\n",
    " on his blog on GitHub"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "import tarfile\n",
    "t = tarfile.open('UrbanSound8K.tar.gz', mode=\"r:gz\")\n",
    "t.extractall()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>slice_file_name</th>\n",
       "      <th>fsID</th>\n",
       "      <th>start</th>\n",
       "      <th>end</th>\n",
       "      <th>salience</th>\n",
       "      <th>fold</th>\n",
       "      <th>classID</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100032-3-0-0.wav</td>\n",
       "      <td>100032</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.317551</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>dog_bark</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>100263-2-0-117.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>58.5</td>\n",
       "      <td>62.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>100263-2-0-121.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>60.5</td>\n",
       "      <td>64.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>100263-2-0-126.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>63.0</td>\n",
       "      <td>67.000000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100263-2-0-137.wav</td>\n",
       "      <td>100263</td>\n",
       "      <td>68.5</td>\n",
       "      <td>72.500000</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>children_playing</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      slice_file_name    fsID  start        end  salience  fold  classID  \\\n",
       "0    100032-3-0-0.wav  100032    0.0   0.317551         1     5        3   \n",
       "1  100263-2-0-117.wav  100263   58.5  62.500000         1     5        2   \n",
       "2  100263-2-0-121.wav  100263   60.5  64.500000         1     5        2   \n",
       "3  100263-2-0-126.wav  100263   63.0  67.000000         1     5        2   \n",
       "4  100263-2-0-137.wav  100263   68.5  72.500000         1     5        2   \n",
       "\n",
       "              class  \n",
       "0          dog_bark  \n",
       "1  children_playing  \n",
       "2  children_playing  \n",
       "3  children_playing  \n",
       "4  children_playing  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_sound = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')\n",
    "raw_sound.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "fold_list = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9', 'fold10']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from python_speech_features import logfbank\n",
    "bands, frames = 20, 41\n",
    "window_size = 512 * (frames - 1)\n",
    "def windows(data, window_size):\n",
    "    start = 0\n",
    "    while start < len(data):\n",
    "        yield start, start + window_size\n",
    "        start += (window_size / 2)\n",
    "\n",
    "        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "sound_clip, s = librosa.load('UrbanSound8K/audio/fold1/101415-3-0-2.wav')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:2: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n",
      "  from ipykernel import kernelapp as app\n",
      "C:\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:3: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n",
      "  app.launch_new_instance()\n"
     ]
    }
   ],
   "source": [
    "# set up window size\n",
    "for (start,end) in windows(sound_clip,window_size):\n",
    "    if(len(sound_clip[start:end]) == window_size):\n",
    "        signal = sound_clip[start:end]\n",
    "        mfcc = librosa.feature.mfcc(y=signal, sr=s, n_mfcc = bands).T.flatten()[:, np.newaxis].T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ".DS_Store\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:15: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n",
      "C:\\Anaconda3\\lib\\site-packages\\ipykernel\\__main__.py:16: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ".DS_Store\n",
      ".DS_Store\n",
      ".DS_Store\n",
      ".DS_Store\n",
      ".DS_Store\n",
      ".DS_Store\n",
      ".DS_Store\n",
      ".DS_Store\n",
      ".DS_Store\n",
      "Exceptions:  10\n"
     ]
    }
   ],
   "source": [
    "# for each file split into 20 segments, take MFCC data from each segment.\n",
    "from os import listdir, system\n",
    "from os.path import isfile, join\n",
    "window_size = 512 * (frames - 1)\n",
    "mfcc_data = []\n",
    "info = []\n",
    "exception_count = 0\n",
    "for i in range(10):\n",
    "    mypath = 'UrbanSound8K/audio/'+ fold_list[i] + '/'\n",
    "    files = [f for f in listdir(mypath) if isfile(join(mypath, f))]\n",
    "    for f in files:\n",
    "        try:\n",
    "            fn = mypath + f\n",
    "            sound_clip, s = librosa.load(fn)\n",
    "            for (start,end) in windows(sound_clip,window_size):\n",
    "                if(len(sound_clip[start:end]) == window_size):\n",
    "                    signal = sound_clip[start:end]\n",
    "                    mfcc = librosa.feature.mfcc(y=signal, sr=s, n_mfcc = bands).T.flatten()[:, np.newaxis].T\n",
    "\n",
    "\n",
    "        except:\n",
    "            print(f)\n",
    "            exception_count += 1\n",
    "            continue\n",
    "\n",
    "        l_row = raw_sound.loc[raw_sound['slice_file_name']==f].values.tolist()\n",
    "        label = l_row[0][-1]\n",
    "        \n",
    "        fold = i+1\n",
    "        mfcc_data.append(mfcc)\n",
    "        info.append([s, label, fold])\n",
    "print(\"Exceptions: \", exception_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# .DS_Store is a default file from Mac file systems unrelated to the Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8732, 20, 41)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# can't store 3d arrays in dataframe, but this is the shape we will be useing to classify on.\n",
    "#features = np.asarray(mfcc_data).reshape(len(mfcc_data),bands,frames)\n",
    "#features.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "feat_pd = pd.DataFrame(np.array(mfcc_data).reshape(len(mfcc_data), 820))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8732, 820)"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feat_pd.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "feat_pd['label'] = np.array(info)[:,1]\n",
    "feat_pd['fold'] = np.array(info)[:,2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pickle.dump(feat_pd, open('const_shape.p', 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print('done')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
